package com.leadingsoft.controller.parse;

import java.util.List;

import org.beetl.sql.core.SQLManager;
import org.cef.callback.CefStringVisitor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.leadingsoft.common.model.Hotel;
import com.leadingsoft.common.model.HotelLog;
import com.leadingsoft.controller.parse.impl.GetCtripHotels;

/**
 * @ClassName ParsingHtmlDocument
 * @Description 解析HTML示例<br>
 *              在visit方法中调用解析页面方法获取数据<br>
 *              示例网站：携程酒店信息
 * @author gongym
 * @date 2018年6月9日 下午10:30:09
 */
public class ParsingHtmlDocument implements CefStringVisitor {
	private String url;
	private String listSelector;
	private SQLManager sqlManager;

	public ParsingHtmlDocument(String url, String listSelector, SQLManager sqlManager) {
		this.url = url;
		this.listSelector = listSelector;
		this.sqlManager = sqlManager;
	}
	@Override
	public void visit(String string) {
		Document htmlDocument = Jsoup.parse(string);
		Elements elementsByClass = htmlDocument.getElementsByClass(listSelector);
		// 检查当前URL是否被解析过了
		HotelLog hotelLogTemplate = new HotelLog();
		hotelLogTemplate.setSpideredUrl(url);
		HotelLog hotelLog = sqlManager.templateOne(hotelLogTemplate);
		if (null == hotelLog) {
			GetCtripHotels getCtripHotelList = new GetCtripHotels();
			List<Object> hotelList = getCtripHotelList.elementsToObjects(elementsByClass);
			sqlManager.insertBatch(Hotel.class, hotelList);
			HotelLog nowHotelLog = new HotelLog();
			nowHotelLog.setSpideredUrl(url);
			sqlManager.insert(nowHotelLog);
		}
	}
}
